import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import numpy as np
from scipy import stats
import sys
sys.path.append(sys.argv[1])



import pandas as pd  
import pickle 
from decision_company import read_csv_file

# Load the dataset  
credit_customers = read_csv_file(os.path.join(sys.argv[1], 'credit_customers.csv'))  
  
# Given values for each employment status category  
customers_1_to_4_years = 339  
customers_7_or_more_years = 253  
  
# Calculate the total number of customers  
total_customers = customers_1_to_4_years + customers_7_or_more_years + 174 + 172 + 62  
  
# Calculate the percentage difference  
percentage_difference = ((customers_1_to_4_years - customers_7_or_more_years) / total_customers) * 100  
  
print(f"The percentage difference between customers with 1 to 4 years of employment and those with 7 or more years of employment is {percentage_difference:.2f}%.")  
pickle.dump(percentage_difference,open("./ref_result/percentage_difference.pkl","wb"))